median_income = read_excel("./data/MedianZIP-3.xlsx") %>%
janitor::clean_names() %>%
rename(zipcode = zip)
Data resources: https://www.psc.isr.umich.edu/dis/census/Features/tract2zip/ It is the data of 2010
May_newcases = read_csv("./data/May_newcases")
## Parsed with column specification:
## cols(
## X1 = col_double(),
## zipcode = col_double(),
## day = col_character(),
## positive = col_double(),
## total = col_double(),
## zcta_cum_perc_pos = col_double(),
## neighborhood_name = col_character(),
## borough_group = col_character(),
## covid_case_rate = col_double(),
## pop_denominator = col_double(),
## covid_death_count = col_double(),
## covid_death_rate = col_double(),
## percent_positive = col_double(),
## newcases_day = col_double()
## )
New_York_City_zip = May_newcases %>%
distinct(zipcode)
median_income_nyc = left_join(New_York_City_zip, median_income)
## Joining, by = "zipcode"
#write.csv(median_income_nyc,file="data/predictor_data/median_income_nyc.csv")
race_by_zipcode = read_excel("./data/race_by_zipcode.xlsx",skip = 1) %>%
select(-id) %>%
janitor::clean_names() %>%
separate(geographic_area_name,into = c("ZCTA", "zipcode_state"), sep = 6) %>%
separate(zipcode_state,into = c("zipcode", "state"),sep = 5) %>%
select(-ZCTA, -state) %>%
mutate(zipcode = as.numeric(zipcode))
race_by_zipcode_nyc = left_join(New_York_City_zip, race_by_zipcode)
## Joining, by = "zipcode"
colnames(race_by_zipcode_nyc) <- str_replace(colnames(race_by_zipcode_nyc), "total_", "")
#write.csv(race_by_zipcode_nyc,file="data/predictor_data/race_by_zipcode_nyc.csv")
sex_by_age = read_excel("./data/sex_by_age.xlsx", skip = 1) %>%
janitor::clean_names() %>%
separate(geographic_area_name,into = c("ZCTA", "zipcode_state"), sep = 6) %>%
separate(zipcode_state,into = c("zipcode", "state"),sep = 5) %>%
select(-ZCTA, -state, -id) %>%
mutate(zipcode = as.numeric(zipcode))
sex_by_age_zipcode_nyc = left_join(New_York_City_zip, sex_by_age)
## Joining, by = "zipcode"
colnames(sex_by_age_zipcode_nyc) <- str_replace(colnames(sex_by_age_zipcode_nyc), "total_", "")
male = sex_by_age_zipcode_nyc %>%
select(zipcode, starts_with("male")) %>%
mutate(sex = rep("male", nrow(.))) %>%
rename(total = male)
colnames(male) <- str_replace(colnames(male), "male_", "")
colnames(male) <- str_replace(colnames(male), "_years", "")
female = sex_by_age_zipcode_nyc %>%
select(zipcode, starts_with("female")) %>%
mutate(sex = rep("female", nrow(.)))%>%
rename(total = female)
colnames(female) <- str_replace(colnames(male), "female_", "")
colnames(female) <- str_replace(colnames(male), "_years", "")
sex_age_zipcode_nyc = rbind(male, female) %>%
select(zipcode, total, sex, everything())
#write.csv(sex_age_zipcode_nyc,file="data/predictor_data/sex_by_age_zipcode_nyc.csv")
household = read_excel("./data/household.xlsx", skip = 1)%>%
janitor::clean_names() %>%
separate(geographic_area_name,into = c("ZCTA", "zipcode_state"), sep = 6) %>%
separate(zipcode_state,into = c("zipcode", "state"),sep = 5) %>%
select(-ZCTA, -state, -id) %>%
mutate(zipcode = as.numeric(zipcode))
household_zipcode_nyc = left_join(New_York_City_zip,household)
## Joining, by = "zipcode"
colnames(household_zipcode_nyc) <- str_replace(colnames(household_zipcode_nyc), "total_", "")
colnames(household_zipcode_nyc) <- str_replace(colnames(household_zipcode_nyc), "_person_household", "")
# write.csv(household_zipcode_nyc,file="data/predictor_data/household_zipcode_nyc.csv")
latitude and longitude.
zipcode_map = read_xlsx("./data/us-zip-code-latitude-and-longitude.xlsx",skip = 1) %>%
janitor::clean_names() %>%
rename(zipcode = zip)
zipcode_map_nyc = left_join(New_York_City_zip, zipcode_map)
## Joining, by = "zipcode"
zipcode_map_nyc = zipcode_map_nyc %>% select(zipcode,latitude,longitude)
r <- GET('http://data.beta.nyc//dataset/0ff93d2d-90ba-457c-9f7e-39e47bf2ac5f/resource/35dd04fb-81b3-479b-a074-a27a37888ce7/download/d085e2f8d0b54d4590b1e7d1f35594c1pediacitiesnycneighborhoods.geojson')
nyc_neighborhoods <- readOGR(content(r,'text'), 'OGRGeoJSON', verbose = F)
## No encoding supplied: defaulting to UTF-8.
summary(nyc_neighborhoods)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
## min max
## x -74.25559 -73.70001
## y 40.49613 40.91553
## Is projected: FALSE
## proj4string : [+proj=longlat +datum=WGS84 +no_defs]
## Data attributes:
## neighborhood boroughCode borough
## Jamaica Bay : 16 1:38 Bronx :75
## Pelham Islands : 14 2:74 Brooklyn :71
## Broad Channel : 10 3:67 Manhattan :37
## Marine Park : 3 4:77 Queens :73
## Pelham Bay Park: 3 5:54 Staten Island:54
## Bayswater : 2
## (Other) :262
## X.id
## http://nyc.pediacities.com/Resource/Neighborhood/Jamaica_Bay : 16
## http://nyc.pediacities.com/Resource/Neighborhood/Pelham_Islands : 14
## http://nyc.pediacities.com/Resource/Neighborhood/Broad_Channel : 10
## http://nyc.pediacities.com/Resource/Neighborhood/Marine_Park : 3
## http://nyc.pediacities.com/Resource/Neighborhood/Pelham_Bay_Park: 3
## http://nyc.pediacities.com/Resource/Neighborhood/Bayswater : 2
## (Other) :262
leaflet(nyc_neighborhoods) %>%
addTiles() %>%
addPolygons(popup = ~neighborhood) %>%
addProviderTiles("CartoDB.Positron")
<<<<<<< HEAD
=======
>>>>>>> b41495b526acf9508648699090a79ee660aa6228
Use Median income to draw the map
library(sf)
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
library(raster)
##
## Attaching package: 'raster'
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
## The following object is masked from 'package:tidyr':
##
## extract
library(dplyr)
library(spData)
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(tmap) # for static and interactive maps
library(leaflet) # for interactive maps
library(ggplot2) # tidyverse data visualization package
library(shiny) # for web applications
median_income_nyc
## # A tibble: 177 x 4
## zipcode median mean pop
## <dbl> <dbl> <dbl> <dbl>
## 1 10001 71245. 123113. 17678
## 2 10002 30844. 46259. 70878
## 3 10003 89999. 139331. 53609
## 4 10004 110184. 156683. 1271
## 5 10005 115133. 163763. 1517
## 6 10006 111220 156776 972
## 7 10007 145459. 256236. 3520
## 8 10009 56615. 78138. 56975
## 9 10010 93702. 137106. 27322
## 10 10011 92359. 160937. 45899
## # … with 167 more rows
m <- leaflet() %>% setView(lng = -73.99653, lat = 40.75074, zoom = 12)
m %>% addTiles()
<<<<<<< HEAD
=======
>>>>>>> b41495b526acf9508648699090a79ee660aa6228
r <- GET('http://data.beta.nyc//dataset/0ff93d2d-90ba-457c-9f7e-39e47bf2ac5f/resource/35dd04fb-81b3-479b-a074-a27a37888ce7/download/d085e2f8d0b54d4590b1e7d1f35594c1pediacitiesnycneighborhoods.geojson')
nyc_neighborhoods <- readOGR(content(r,'text'), 'OGRGeoJSON', verbose = F)
## No encoding supplied: defaulting to UTF-8.
leaflet(nyc_neighborhoods) %>%
addTiles() %>%
addPolygons(stroke = FALSE, smoothFactor = 0.2, fillOpacity = 1,popup = ~neighborhood) %>%
addProviderTiles("CartoDB.Positron")
<<<<<<< HEAD
=======
>>>>>>> b41495b526acf9508648699090a79ee660aa6228
data0614 = read.csv("./data/June/0614/data-by-modzcta0614.csv") %>%
drop_na()%>% janitor::clean_names() %>%
rename(positive = covid_case_count,
MODZCTA = modified_zcta) %>%
dplyr::select(MODZCTA,positive)
data0614_death = read.csv("./data/June/0614/data-by-modzcta0614.csv") %>%
drop_na()%>% janitor::clean_names() %>%
rename(positive = covid_case_count,
MODZCTA = modified_zcta) %>%
dplyr::select(MODZCTA,covid_death_count)
spdf = rgdal::readOGR("Geography-resources/MODZCTA_2010_WGS1984.geo.json")
## OGR data source with driver: GeoJSON
## Source: "/Users/ziqizhou/Desktop/MSPH_1st_Year/P8105_Data_Science/Git/NYC_covid-19.github.io/Geography-resources/MODZCTA_2010_WGS1984.geo.json", layer: "MODZCTA_2010_WGS1984.geo"
## with 178 features
## It has 2 fields
spdf@data
## MODZCTA label
## 0 10001 10001, 10118
## 1 10002 10002
## 2 10003 10003
## 3 10004 10004
## 4 10005 10005
## 5 10006 10006
## 6 10007 10007
## 7 10009 10009
## 8 10010 10010
## 9 10011 10011
## 10 10012 10012
## 11 10013 10013
## 12 10014 10014
## 13 10016 10016
## 14 10017 10017
## 15 10018 10018
## 16 10019 10019, 10020
## 17 10021 10021
## 18 10022 10022
## 19 10023 10023
## 20 10024 10024
## 21 10025 10025
## 22 10026 10026
## 23 10027 10027
## 24 10028 10028
## 25 10029 10029
## 26 10030 10030
## 27 10031 10031
## 28 10032 10032
## 29 10033 10033
## 30 10034 10034
## 31 10035 10035
## 32 10036 10036
## 33 10037 10037
## 34 10038 10038
## 35 10039 10039
## 36 10040 10040
## 37 10044 10044
## 38 10065 10065
## 39 10069 10069
## 40 10075 10075, 10162
## 41 10128 10128
## 42 10280 10280
## 43 10282 10282
## 44 10301 10301
## 45 10302 10302
## 46 10303 10303
## 47 10304 10304
## 48 10305 10305
## 49 10306 10306
## 50 10307 10307
## 51 10308 10308
## 52 10309 10309
## 53 10310 10310
## 54 10312 10312
## 55 10314 10314
## 56 10451 10451
## 57 10452 10452
## 58 10453 10453
## 59 10454 10454
## 60 10455 10455
## 61 10456 10456
## 62 10457 10457
## 63 10458 10458
## 64 10459 10459
## 65 10460 10460
## 66 10461 10461
## 67 10462 10462
## 68 10463 10463
## 69 10464 10464
## 70 10465 10465
## 71 10466 10466
## 72 10467 10467
## 73 10468 10468
## 74 10469 10469
## 75 10470 10470
## 76 10471 10471
## 77 10472 10472
## 78 10473 10473
## 79 10474 10474
## 80 10475 10475
## 81 11004 11004, 11005
## 82 11101 11101
## 83 11102 11102
## 84 11103 11103
## 85 11104 11104
## 86 11105 11105
## 87 11106 11106
## 88 11109 11109
## 89 11201 11201
## 90 11203 11203
## 91 11204 11204
## 92 11205 11205
## 93 11206 11206
## 94 11207 11207
## 95 11208 11208
## 96 11209 11209
## 97 11210 11210
## 98 11211 11211, 11249
## 99 11212 11212
## 100 11213 11213
## 101 11214 11214
## 102 11215 11215
## 103 11216 11216
## 104 11217 11217, 11243
## 105 11218 11218
## 106 11219 11219
## 107 11220 11220
## 108 11221 11221
## 109 11222 11222
## 110 11223 11223
## 111 11224 11224
## 112 11225 11225
## 113 11226 11226
## 114 11228 11228
## 115 11229 11229
## 116 11230 11230
## 117 11231 11231
## 118 11232 11232
## 119 11233 11233
## 120 11234 11234
## 121 11235 11235
## 122 11236 11236
## 123 11237 11237
## 124 11238 11238
## 125 11239 11239
## 126 11354 11354
## 127 11355 11355
## 128 11356 11356
## 129 11357 11357
## 130 11358 11358
## 131 11360 11360
## 132 11361 11361
## 133 11362 11362
## 134 11363 11363
## 135 11364 11364
## 136 11365 11365
## 137 11366 11366
## 138 11367 11367
## 139 11368 11368
## 140 11369 11369
## 141 11370 11370
## 142 11372 11372
## 143 11373 11373
## 144 11374 11374
## 145 11375 11375
## 146 11377 11377
## 147 11378 11378
## 148 11379 11379
## 149 11385 11385
## 150 11411 11411
## 151 11412 11412
## 152 11413 11413
## 153 11414 11414
## 154 11415 11415
## 155 11416 11416
## 156 11417 11417
## 157 11418 11418
## 158 11419 11419
## 159 11420 11420
## 160 11421 11421
## 161 11422 11422
## 162 11423 11423
## 163 11426 11426
## 164 11427 11427
## 165 11428 11428
## 166 11429 11429
## 167 11432 11432
## 168 11433 11433
## 169 11434 11434
## 170 11435 11435
## 171 11436 11436
## 172 11691 11691
## 173 11692 11692
## 174 11693 11693
## 175 11694 11694
## 176 11697 11697
## 177 99999
posi_map = geo_join(spdf,data0614,"MODZCTA","MODZCTA")
pal <- colorNumeric("Greens", domain=posi_map$positive)
# Getting rid of rows with NA values
posi_map <- subset(posi_map, !is.na(positive))
# Setting up the pop up text
popup_sb <- paste0("Positive Cases: ", as.character(posi_map$positive),
" Neighborhood: ", as.character(posi_map$neighborhood_name),
" Boro: ",as.character(posi_map$borough_group),sep = ",")
# Mapping it with the new tiles CartoDB.Positron
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(lng = -73.99653, lat = 40.75074, zoom = 12) %>%
addPolygons(data = posi_map ,
fillColor = ~pal(posi_map$positive),
fillOpacity = 0.7,
weight = 0.2,
smoothFactor = 0.2,
popup = ~popup_sb) %>%
addLegend(pal = pal,
values = posi_map$positive,
position = "bottomright",
title = "Cumulative cases")
<<<<<<< HEAD
=======
>>>>>>> b41495b526acf9508648699090a79ee660aa6228
median_income_map = geo_join(spdf, median_income_nyc,"MODZCTA", "zipcode")
pal_med = colorNumeric(palette="YlOrRd", domain=median_income_map@data$median, na.color="transparent")
median_income_map = subset(median_income_map, !is.na(median))
popup_medi_sb <- paste0("Median Income: ", as.character(median_income_map$median))
# Mapping it with the new tiles CartoDB.Positron
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(lng = -73.99653, lat = 40.75074, zoom = 12) %>%
addPolygons(data = median_income_map ,
fillColor = ~pal_med(median_income_map$median),
fillOpacity = 0.7,
weight = 0.2,
smoothFactor = 0.2,
popup = ~popup_medi_sb) %>%
addLegend(pal = pal_med,
values = median_income_map$median,
position = "bottomright",
title = "Median Annual Income")
<<<<<<< HEAD
=======
>>>>>>> b41495b526acf9508648699090a79ee660aa6228
death_map = geo_join(spdf,data0614_death,"MODZCTA","MODZCTA")
pal_dea <- colorNumeric("Reds", domain=death_map$covid_death_count)
# Getting rid of rows with NA values
death_map <- subset(death_map, !is.na(covid_death_count))
# Setting up the pop up text
popup_dea_sb <- paste0("Death Count: ", as.character(death_map$covid_death_count))
# Mapping it with the new tiles CartoDB.Positron
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(lng = -73.99653, lat = 40.75074, zoom = 12) %>%
addPolygons(data = death_map ,
fillColor = ~pal_dea(death_map$covid_death_count),
fillOpacity = 0.7,
weight = 0.2,
smoothFactor = 0.2,
popup = ~popup_dea_sb) %>%
addLegend(pal = pal_dea,
values = death_map$covid_death_count,
position = "bottomright",
title = "Death Count")
<<<<<<< HEAD
=======
>>>>>>> b41495b526acf9508648699090a79ee660aa6228
library(broom)
spdf_fortified <- tidy(spdf)
## Regions defined for each Polygons
# Plot it
ggplot() +
geom_polygon(data = spdf_fortified, aes( x = long, y = lat, group = group), fill="#69b3a2", color="white") +
theme_void() +
coord_map()
